# Load the data
df <- read_csv("ADAproject_2025_data.csv")
## Rows: 470 Columns: 22
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Variant, loanofficer_id
## dbl (20): day, typeI_init, typeI_fin, typeII_init, typeII_fin, agree_init, a...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
view(df)
# Show the distribution of each variable
plot_list_1 <- list()
numeric_columns_1 <- names(df)[3:length(names(df))]
for (metric in numeric_columns_1) {
p <- ggplot(df, aes(x = .data[[metric]], fill = Variant)) +
geom_density(alpha = 0.5) +
labs(title = paste("Distribution of", metric, "by Variant"),
x = metric, y = "Density") +
theme_minimal()
plot_list_1[[metric]] <- p
}
print(plot_list_1)
## $day
##
## $typeI_init
##
## $typeI_fin
##
## $typeII_init
##
## $typeII_fin
##
## $agree_init
##
## $agree_fin
##
## $conflict_init
##
## $conflict_fin
##
## $revised_per_ai
##
## $revised_agst_ai
##
## $fully_complt
##
## $confidence_init_total
##
## $confidence_fin_total
##
## $complt_init
##
## $complt_fin
##
## $ai_typeI
##
## $ai_typeII
##
## $badloans_num
##
## $goodloans_num
## # A tibble: 138 × 22
## Variant loanofficer_id day typeI_init typeI_fin typeII_init typeII_fin
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Treatment qamcqdoe 1 0 0 2 2
## 2 Treatment 09pij0e2 1 3 2 0 1
## 3 Treatment 4cdwcblq 1 1 1 0 1
## 4 Treatment 7bx6hbg5 1 4 2 1 2
## 5 Treatment kmr3oifc 1 2 2 0 1
## 6 Treatment uybljp0c 1 1 1 0 0
## 7 Control 2udootyt 1 2 0 1 0
## 8 Control 2udootyt 2 4 0 1 0
## 9 Control 2udootyt 3 3 0 2 0
## 10 Control 2udootyt 4 3 0 2 0
## # ℹ 128 more rows
## # ℹ 15 more variables: agree_init <dbl>, agree_fin <dbl>, conflict_init <dbl>,
## # conflict_fin <dbl>, revised_per_ai <dbl>, revised_agst_ai <dbl>,
## # fully_complt <dbl>, confidence_init_total <dbl>,
## # confidence_fin_total <dbl>, complt_init <dbl>, complt_fin <dbl>,
## # ai_typeI <dbl>, ai_typeII <dbl>, badloans_num <dbl>, goodloans_num <dbl>
# Find the rows that complt_init > complt_fin
(df_filtered_higher <- df %>% filter(complt_init > complt_fin))
## # A tibble: 96 × 22
## Variant loanofficer_id day typeI_init typeI_fin typeII_init typeII_fin
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Control 2udootyt 1 2 0 1 0
## 2 Control 2udootyt 2 4 0 1 0
## 3 Control 2udootyt 3 3 0 2 0
## 4 Control 2udootyt 4 3 0 2 0
## 5 Control 2udootyt 5 3 0 0 0
## 6 Control 2udootyt 6 5 0 0 0
## 7 Control 2udootyt 7 5 0 0 0
## 8 Control 2udootyt 8 4 0 1 0
## 9 Control 2udootyt 9 4 0 0 0
## 10 Control 2udootyt 10 2 0 0 0
## # ℹ 86 more rows
## # ℹ 15 more variables: agree_init <dbl>, agree_fin <dbl>, conflict_init <dbl>,
## # conflict_fin <dbl>, revised_per_ai <dbl>, revised_agst_ai <dbl>,
## # fully_complt <dbl>, confidence_init_total <dbl>,
## # confidence_fin_total <dbl>, complt_init <dbl>, complt_fin <dbl>,
## # ai_typeI <dbl>, ai_typeII <dbl>, badloans_num <dbl>, goodloans_num <dbl>
# Find the rows that complt_init == 10 & complt_fin == 0
(df_filtered_10 <- df %>% filter(complt_init == 10 & complt_fin == 0))
## # A tibble: 82 × 22
## Variant loanofficer_id day typeI_init typeI_fin typeII_init typeII_fin
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Control 2udootyt 2 4 0 1 0
## 2 Control 2udootyt 3 3 0 2 0
## 3 Control 2udootyt 4 3 0 2 0
## 4 Control 2udootyt 5 3 0 0 0
## 5 Control 2udootyt 6 5 0 0 0
## 6 Control 2udootyt 7 5 0 0 0
## 7 Control 2udootyt 8 4 0 1 0
## 8 Control 2udootyt 9 4 0 0 0
## 9 Control 2udootyt 10 2 0 0 0
## 10 Control l31kzq2d 3 5 0 2 0
## # ℹ 72 more rows
## # ℹ 15 more variables: agree_init <dbl>, agree_fin <dbl>, conflict_init <dbl>,
## # conflict_fin <dbl>, revised_per_ai <dbl>, revised_agst_ai <dbl>,
## # fully_complt <dbl>, confidence_init_total <dbl>,
## # confidence_fin_total <dbl>, complt_init <dbl>, complt_fin <dbl>,
## # ai_typeI <dbl>, ai_typeII <dbl>, badloans_num <dbl>, goodloans_num <dbl>
# Find the rows that complt_init < complt_fin
(df_filtered_lower <- df %>% filter(complt_init < complt_fin))
## # A tibble: 42 × 22
## Variant loanofficer_id day typeI_init typeI_fin typeII_init typeII_fin
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Treatment qamcqdoe 1 0 0 2 2
## 2 Treatment 09pij0e2 1 3 2 0 1
## 3 Treatment 4cdwcblq 1 1 1 0 1
## 4 Treatment 7bx6hbg5 1 4 2 1 2
## 5 Treatment kmr3oifc 1 2 2 0 1
## 6 Treatment uybljp0c 1 1 1 0 0
## 7 Treatment 92vdohom 1 0 0 0 0
## 8 Treatment vflkw3iq 1 3 2 1 1
## 9 Treatment yc74rzbp 1 3 2 1 3
## 10 Treatment 1ckkyukp 1 1 3 0 2
## # ℹ 32 more rows
## # ℹ 15 more variables: agree_init <dbl>, agree_fin <dbl>, conflict_init <dbl>,
## # conflict_fin <dbl>, revised_per_ai <dbl>, revised_agst_ai <dbl>,
## # fully_complt <dbl>, confidence_init_total <dbl>,
## # confidence_fin_total <dbl>, complt_init <dbl>, complt_fin <dbl>,
## # ai_typeI <dbl>, ai_typeII <dbl>, badloans_num <dbl>, goodloans_num <dbl>
# Deal with missing values in complt_fin
df_1 <- df %>% filter(fully_complt == 10)
# Summary the data, group by loan officer id
df_summary <- df_1 %>%
group_by(Variant,loanofficer_id) %>%
summarise(
typeI_init_mean = mean(typeI_init),
typeI_fin_mean = mean(typeI_fin),
typeII_init_mean = mean(typeII_init),
typeII_fin_mean = mean(typeII_fin),
agree_init_mean = mean(agree_init),
agree_fin_mean = mean(agree_fin),
conflict_init_mean = mean(conflict_init),
conflict_fin_mean = mean(conflict_fin),
confidence_init_total_mean = mean(confidence_init_total),
confidence_fin_total_mean = mean(confidence_fin_total),
revised_per_ai_mean = mean(revised_per_ai),
revised_agst_ai_mean = mean(revised_agst_ai),
fully_complt_mean = mean(fully_complt),
complt_init_mean = mean(complt_init),
complt_fin_mean = mean(complt_fin),
ai_typeI_mean = mean(ai_typeI),
ai_typeII_mean = mean(ai_typeII),
goodloans_num_mean = mean(goodloans_num),
badloans_num_mean = mean(badloans_num),
.groups = "drop"
)
print(df_summary)
## # A tibble: 38 × 21
## Variant loanofficer_id typeI_init_mean typeI_fin_mean typeII_init_mean
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 Control 0g7pi6g8 3.44 3.89 1.44
## 2 Control 0gh7r2hr 2.33 2.44 1.56
## 3 Control bzeya726 2.44 2.33 1.44
## 4 Control dlpxpwdj 6 6 0.571
## 5 Control i6miisiq 5.62 5.5 0.875
## 6 Control p5g1bxa1 2.89 3.11 1.33
## 7 Control qwun9ha5 3.62 3.75 1.25
## 8 Control sarganjx 2.22 2.33 1.56
## 9 Control ugdh6i8o 4.56 5.22 0.778
## 10 Control uui3fiii 2.78 2.89 1.22
## # ℹ 28 more rows
## # ℹ 16 more variables: typeII_fin_mean <dbl>, agree_init_mean <dbl>,
## # agree_fin_mean <dbl>, conflict_init_mean <dbl>, conflict_fin_mean <dbl>,
## # confidence_init_total_mean <dbl>, confidence_fin_total_mean <dbl>,
## # revised_per_ai_mean <dbl>, revised_agst_ai_mean <dbl>,
## # fully_complt_mean <dbl>, complt_init_mean <dbl>, complt_fin_mean <dbl>,
## # ai_typeI_mean <dbl>, ai_typeII_mean <dbl>, goodloans_num_mean <dbl>, …
Since aggregating significantly reduces the sample size to 38, we choose to proceed without aggregation to retain more data and ensure better statistical power for analysis.
# Deal with missing values in complt_fin
df_2 <- df %>% filter(fully_complt == 10)
print(df_2)
## # A tibble: 330 × 22
## Variant loanofficer_id day typeI_init typeI_fin typeII_init typeII_fin
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Treatment qamcqdoe 2 2 2 3 3
## 2 Treatment qamcqdoe 3 3 3 0 0
## 3 Treatment qamcqdoe 4 1 2 1 1
## 4 Treatment qamcqdoe 5 0 2 0 0
## 5 Treatment qamcqdoe 6 0 1 4 0
## 6 Treatment qamcqdoe 7 0 1 1 0
## 7 Treatment qamcqdoe 8 0 3 4 1
## 8 Treatment qamcqdoe 9 0 1 4 1
## 9 Treatment qamcqdoe 10 0 0 2 1
## 10 Treatment 09pij0e2 2 1 1 3 3
## # ℹ 320 more rows
## # ℹ 15 more variables: agree_init <dbl>, agree_fin <dbl>, conflict_init <dbl>,
## # conflict_fin <dbl>, revised_per_ai <dbl>, revised_agst_ai <dbl>,
## # fully_complt <dbl>, confidence_init_total <dbl>,
## # confidence_fin_total <dbl>, complt_init <dbl>, complt_fin <dbl>,
## # ai_typeI <dbl>, ai_typeII <dbl>, badloans_num <dbl>, goodloans_num <dbl>
# Create new features
# init error rate
# Recall
df_2$recall_init <- df_2$badloans_num/(df_2$badloans_num + df_2$typeII_init)
df_2$recall_fin <- df_2$badloans_num/(df_2$badloans_num + df_2$typeII_fin)
# Precision
df_2$precision_init <- df_2$badloans_num/(df_2$badloans_num + df_2$typeI_init)
df_2$precision_fin <- df_2$badloans_num/(df_2$badloans_num + df_2$typeI_fin)
# Improvement of recall and precision rate after model
df_2$recall_imp <- df_2$recall_fin-df_2$recall_init
df_2$precision_imp <- df_2$precision_fin-df_2$precision_init
# confilct rate
df_2$conflict_init_rate <- df_2$conflict_init/df_2$complt_init
df_2$conflict_fin_rate <- df_2$conflict_fin/df_2$complt_fin
# decline of conflict rate
df_2$conflict_dec <- df_2$conflict_init_rate-df_2$conflict_fin_rate
# improvement of confidence
df_2$confidence_imp <- df_2$confidence_fin_total-df_2$confidence_init_total
# improvement of confidence(bcs the maximum of this column is 1000, we need to scle confidence_imp (e.g. dividing by 1000) and then log-transforming it)
df_2$log_confidence_imp <- log(df_2$confidence_imp / 1000 + 1)
# improvement of confidence
df_2$scle_log_confidence_imp <- (df_2$log_confidence_imp - min(df_2$log_confidence_imp)) / (max(df_2$log_confidence_imp) - min(df_2$log_confidence_imp))
# Show the distribution of each variable again
plot_list_3 <- list()
numeric_columns_3 <- names(df_2)[3:length(names(df_2))]
for (metric in numeric_columns_3) {
p <- ggplot(df_2, aes(x = .data[[metric]], fill = Variant)) +
geom_density(alpha = 0.5) +
labs(title = paste("Distribution of", metric, "by Variant"),
x = metric, y = "Density") +
theme_minimal()
plot_list_3[[metric]] <- p
}
print(plot_list_3)
## $day
##
## $typeI_init
##
## $typeI_fin
##
## $typeII_init
##
## $typeII_fin
##
## $agree_init
##
## $agree_fin
##
## $conflict_init
##
## $conflict_fin
##
## $revised_per_ai
##
## $revised_agst_ai
##
## $fully_complt
##
## $confidence_init_total
##
## $confidence_fin_total
##
## $complt_init
##
## $complt_fin
##
## $ai_typeI
##
## $ai_typeII
##
## $badloans_num
##
## $goodloans_num
##
## $recall_init
## Warning: Removed 26 rows containing non-finite outside the scale range
## (`stat_density()`).
##
## $recall_fin
## Warning: Removed 26 rows containing non-finite outside the scale range
## (`stat_density()`).
##
## $precision_init
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_density()`).
##
## $precision_fin
##
## $recall_imp
## Warning: Removed 26 rows containing non-finite outside the scale range
## (`stat_density()`).
##
## $precision_imp
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_density()`).
##
## $conflict_init_rate
##
## $conflict_fin_rate
##
## $conflict_dec
##
## $confidence_imp
##
## $log_confidence_imp
##
## $scle_log_confidence_imp
t_tests_welch_2 <- list(
recall_imp = t.test(recall_imp ~ Variant, data = df_2, var.equal = FALSE),
precision_imp = t.test(precision_imp ~ Variant, data = df_2, var.equal = FALSE),
conflict_rate_dec = t.test(conflict_dec ~ Variant, data = df_2, var.equal = FALSE),
confidence_imp = t.test(scle_log_confidence_imp ~ Variant, data = df_2, var.equal = FALSE))
print(t_tests_welch_2)
## $recall_imp
##
## Welch Two Sample t-test
##
## data: recall_imp by Variant
## t = -4.0118, df = 265.83, p-value = 7.837e-05
## alternative hypothesis: true difference in means between group Control and group Treatment is not equal to 0
## 95 percent confidence interval:
## -0.08089470 -0.02763192
## sample estimates:
## mean in group Control mean in group Treatment
## 0.001508859 0.055772171
##
##
## $precision_imp
##
## Welch Two Sample t-test
##
## data: precision_imp by Variant
## t = -3.1907, df = 304.66, p-value = 0.001567
## alternative hypothesis: true difference in means between group Control and group Treatment is not equal to 0
## 95 percent confidence interval:
## -0.06778803 -0.01607042
## sample estimates:
## mean in group Control mean in group Treatment
## -0.01683739 0.02509183
##
##
## $conflict_rate_dec
##
## Welch Two Sample t-test
##
## data: conflict_dec by Variant
## t = -5.1299, df = 246.26, p-value = 5.878e-07
## alternative hypothesis: true difference in means between group Control and group Treatment is not equal to 0
## 95 percent confidence interval:
## -0.10590730 -0.04714265
## sample estimates:
## mean in group Control mean in group Treatment
## 0.03372093 0.11024590
##
##
## $confidence_imp
##
## Welch Two Sample t-test
##
## data: scle_log_confidence_imp by Variant
## t = -2.0186, df = 281.22, p-value = 0.04448
## alternative hypothesis: true difference in means between group Control and group Treatment is not equal to 0
## 95 percent confidence interval:
## -0.0508259430 -0.0006396344
## sample estimates:
## mean in group Control mean in group Treatment
## 0.4164260 0.4421588
Welch t-tests show that Treatment significantly improved recall (p < 0.001) and precision (p < 0.01), indicating better AI-assisted decision-making. Conflict significantly decreased (p < 0.001), meaning loan officers aligned more with AI recommendations. Confidence improved slightly (p < 0.05), but the effect is small.
# Run PCA to determine variable importance in OEC
suppressWarnings(library(FactoMineR))
pca_model <- PCA(df_2[, c("recall_imp",
"precision_imp",
"conflict_dec",
"scle_log_confidence_imp")],
scale.unit = TRUE, graph = FALSE)
## Warning in PCA(df_2[, c("recall_imp", "precision_imp", "conflict_dec",
## "scle_log_confidence_imp")], : Missing values are imputed by the mean of the
## variable: you should use the imputePCA function of the missMDA package
print(pca_model$eig)
## eigenvalue percentage of variance cumulative percentage of variance
## comp 1 1.3909256 34.77314 34.77314
## comp 2 1.1556634 28.89159 63.66473
## comp 3 0.9397291 23.49323 87.15795
## comp 4 0.5136819 12.84205 100.00000
According to the PCA result, we can find: PC1 (recall_imp) explains 34.77% of the variance, indicating that the data has the highest variability in this direction. PC2 (precision_imp) explains 28.9% of the variance, which together with PC1 explains a total of 63.66% of the variability. PC3 (conflict_dec) explains 23.49% and cumulatively explains 87.16% of the variability in the data. PC4 (scle_log_confidence_imp) explains only 12.84% and is not significant.
# using recall_imp, precision_imp, conflict_dec and scle_log_confidence_imp
# setting the weight according to PCA result
w1 <- 0.3477 # weight of recall_imp
w2 <- 0.289 # weight of precision_imp
w3 <- 0.2349 # weight of conflict_dec
w4 <- 0.1284 #weight of scle_log_confidence_imp
df_2$oec <- w1*df_2$recall_imp + w2*df_2$precision_imp + w3*df_2$conflict_dec + w4*df_2$scle_log_confidence_imp
# oec_1 t-test
t.test(oec ~ Variant, data = df_2, var.equal = FALSE)
##
## Welch Two Sample t-test
##
## data: oec by Variant
## t = -6.7623, df = 293.79, p-value = 7.327e-11
## alternative hypothesis: true difference in means between group Control and group Treatment is not equal to 0
## 95 percent confidence interval:
## -0.06921005 -0.03800635
## sample estimates:
## mean in group Control mean in group Treatment
## 0.05704877 0.11065697
Welch t-test shows that Treatment significantly improved the Overall Evaluation Criterion (OEC) (p < 0.001), with the Treatment group (mean = 0.1107) performing notably better than the Control group (mean = 0.0570). The 95% confidence interval [-0.0692, -0.0380] confirms a meaningful positive impact of AI intervention.
pairwise_diff <- df_2 %>%
group_by(Variant) %>%
summarise(
OEC_mean = mean(oec, na.rm = TRUE),
recall_imp_mean = mean(recall_imp, na.rm = TRUE),
precision_imp_mean = mean(precision_imp, na.rm = TRUE),
conflict_dec_mean = mean(conflict_dec, na.rm = TRUE),
scle_log_confidence_imp_mean = mean(scle_log_confidence_imp, na.rm = TRUE)) %>%
summarise(
Diff_Treatment_Control = OEC_mean[Variant == "Treatment"] - OEC_mean[Variant == "Control"],
Diff_recall_imp = recall_imp_mean[Variant == "Treatment"] - recall_imp_mean[Variant == "Control"],
Diff_precision = precision_imp_mean[Variant == "Treatment"] - precision_imp_mean[Variant == "Control"],
Diff_Conflict = conflict_dec_mean[Variant == "Treatment"] - conflict_dec_mean[Variant == "Control"],
Diff_Confidence = scle_log_confidence_imp_mean[Variant == "Treatment"] - scle_log_confidence_imp_mean[Variant == "Control"],
Perc_Treatment_Control = (Diff_Treatment_Control / abs(OEC_mean[Variant == "Control"])) * 100,
Perc_recall = (Diff_recall_imp / abs(recall_imp_mean[Variant == "Control"])) * 100,
Perc_precision = (Diff_precision / abs(precision_imp_mean[Variant == "Control"])) * 100,
Perc_Conflict = (Diff_Conflict / abs(conflict_dec_mean[Variant == "Control"])) * 100,
Perc_Confidence = (Diff_Confidence / abs(scle_log_confidence_imp_mean[Variant == "Control"])) * 100
)
# View pairwise differences
print(pairwise_diff)
## # A tibble: 1 × 10
## Diff_Treatment_Control Diff_recall_imp Diff_precision Diff_Conflict
## <dbl> <dbl> <dbl> <dbl>
## 1 0.0536 0.0543 0.0419 0.0765
## # ℹ 6 more variables: Diff_Confidence <dbl>, Perc_Treatment_Control <dbl>,
## # Perc_recall <dbl>, Perc_precision <dbl>, Perc_Conflict <dbl>,
## # Perc_Confidence <dbl>
According to the result, we can find: Treatment significantly improved the OEC (p < 0.001) compared to Control by 93.9%. Reduced conflict and increased confidence indicate that the new Treatment methodology has improved the efficiency and stability of the approval process.
# show the absolute differences plot
ggplot(pairwise_diff) +
geom_bar(aes(x = "OEC", y = Diff_Treatment_Control), stat = "identity", fill = "steelblue") +
geom_bar(aes(x = "Recall", y = Diff_recall_imp), stat = "identity", fill = "steelblue") +
geom_bar(aes(x = "Precision", y = Diff_precision), stat = "identity", fill = "steelblue") +
geom_bar(aes(x = "Conflict", y = Diff_Conflict), stat = "identity", fill = "steelblue") +
geom_bar(aes(x = "Confidence", y = Diff_Confidence), stat = "identity", fill = "steelblue") +
labs(title = "Pairwise Differences (Absolute)", x = "Metric", y = "Difference") +
theme_minimal()
# show the percentage differences plot
ggplot(pairwise_diff) +
geom_bar(aes(x = "OEC", y = Perc_Treatment_Control), stat = "identity", fill = "steelblue") +
geom_bar(aes(x = "Recall", y = Perc_recall), stat = "identity", fill = "steelblue") +
geom_bar(aes(x = "Precision", y = Perc_precision), stat = "identity", fill = "steelblue") +
geom_bar(aes(x = "Conflict", y = Perc_Conflict), stat = "identity", fill = "steelblue") +
geom_bar(aes(x = "Confidence", y = Perc_Confidence), stat = "identity", fill = "steelblue") +
labs(title = "Pairwise Differences (Percentage)", x = "Metric", y = "Percentage Change (%)") +
theme_minimal()
Control = df_2$oec[df_2$Variant == "Control"]
Treatment = df_2$oec[df_2$Variant == "Treatment"]
cohens_d(Treatment, Control) # compute effect size of difference between Treatment & Control
## Warning: Missing values detected. NAs dropped.
## Cohen's d | 95% CI
## ------------------------
## 0.65 | [0.39, 0.90]
##
## - Estimated using pooled SD.
effectsize::interpret_cohens_d(0.65)
## [1] "medium"
## (Rules: cohen1988)
Treatment significantly improved recall (p < 0.001, d = 0.65)
compared to Control by 93.9%.
The positive impact of AI on recall and precision has both statistical
and practical significance, considering the p-value, Cohen’s d measure,
and percentage change.
Thus, the AI model should be implemented given the statistically
significant and practically meaningful improvements in decision accuracy
and alignment with AI recommendations.
pwr.t.test(power = .8, # 80% power
d = 0.5, # Cohen's d
sig.level = 0.05, # threshold for p-val
type = "two.sample")
##
## Two-sample t test power calculation
##
## n = 63.76561
## d = 0.5
## sig.level = 0.05
## power = 0.8
## alternative = two.sided
##
## NOTE: n is number in *each* group
Power analysis indicates that with a moderate effect size (Cohen’s d = 0.5), a sample size of approximately 64 per group is required to achieve 80% power at a 5% significance level. This means the study has an 80% chance of detecting a true effect if it exists, while keeping the risk of a Type I error at 5%. If the actual sample size is smaller, the study may lack sufficient power to detect moderate effects reliably.